df <- read.csv("CrowNestlingClimate.csv", h=TRUE)
#Select variables
df <- df %>%
select(Year,Name,NestName,ID,AgeField,CalcAge,HatchDateJul,HatchDateJulYear,AllSex,BillNT,BillWidth,BillDepth,TEC,Head,UpperBill,UBillSurface,TotBillSurface,Skull,Tarsus,Weight)
#Rename variables
df <- df %>%
rename(FieldAge=AgeField,BNT=BillNT,BW=BillWidth,BD=BillDepth,UB=UpperBill,UBS=UBillSurface,TBS=TotBillSurface)
#Count NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs
## Year Name NestName ID
## 0 3 0 0
## FieldAge CalcAge HatchDateJul HatchDateJulYear
## 162 4 3 3
## AllSex BNT BW BD
## 0 11 15 15
## TEC Head UB UBS
## 12 13 2 2
## TBS Skull Tarsus Weight
## 0 16 8 20
#Remove NAs
df <- df %>%
filter_at(vars(Weight,HatchDateJul,BD,Tarsus,Skull,BW), all_vars(!is.na(.)))
#Recount NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs
## Year Name NestName ID
## 0 3 0 0
## FieldAge CalcAge HatchDateJul HatchDateJulYear
## 160 0 0 0
## AllSex BNT BW BD
## 0 0 0 0
## TEC Head UB UBS
## 0 0 0 0
## TBS Skull Tarsus Weight
## 0 0 0 0
#filter out names with 'doa' or 'dead'
df <- df %>% filter(!grepl("doa|dead",Name))
#filter out Weights < 160
df <- df %>% filter(Weight > 160)
range(df$Weight)
## [1] 163 500
WeightByFieldAge.plot <- ggplot(data = df, aes(x=FieldAge,y=Weight,label=ID,color=HatchDateJul))+
geom_point()
ggplotly(WeightByFieldAge.plot)
df <- df %>% filter(between(CalcAge, 24,30))
range(df$CalcAge)
## [1] 24.0 29.9
WeightByCalcAge.plot <- ggplot(data = df, aes(x=CalcAge,y=Weight,label=ID))+
geom_point()
ggplotly(WeightByCalcAge.plot)
climate.df <- read.csv("ClimateMetrics.csv", h=TRUE)
df <- left_join(df,climate.df, by = "HatchDateJulYear")
df <- df %>% relocate(Date, .before = AllSex)
#write.csv(df, "AllNestlingsClimateJoined.csv")
#variables that don't get scaled
DataNotScaled.df <- df[,1:10]
#Numerical data that do get scaled
DataToScale.df <- df[,11:39]
#Scale those data
Scaled.df <- scale(DataToScale.df)
#Rejoin with variables that don't get scaled
scaled.df <- cbind(DataNotScaled.df,Scaled.df)
BD.scaled.mdl <- lm(data = scaled.df, BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge)
summary(BD.scaled.mdl)
##
## Call:
## lm(formula = BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge,
## data = scaled.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0716 -0.4851 -0.0221 0.4559 3.6906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.02712 0.42291 -9.522 < 2e-16 ***
## GDDSum12_22 0.14414 0.01708 8.441 < 2e-16 ***
## PrecipSum12_22 -0.01170 0.01681 -0.696 0.487
## Weight 0.58078 0.01796 32.338 < 2e-16 ***
## CalcAge 0.15426 0.01616 9.546 < 2e-16 ***
## GDDSum12_22:PrecipSum12_22 -0.07563 0.01559 -4.852 1.32e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7508 on 2042 degrees of freedom
## Multiple R-squared: 0.4376, Adjusted R-squared: 0.4363
## F-statistic: 317.8 on 5 and 2042 DF, p-value: < 2.2e-16